# -*- coding: utf-8 -*-
__author__ = 'qh'

import time
import pandas as pd
import numpy as np
import os

'''
加载数据
'''


def userStat():
    dataPath = os.getcwd()
    parent_path = os.path.dirname(dataPath)

    info = pd.read_csv(parent_path + '\data\sample_info.csv')
    log = pd.read_csv(parent_path + '\data\sample_log.csv')
    m, n = np.shape(log)
    data = np.array(log.values[0:m, 0:n])

    return data


'''
提取特征
'''


def getFeature():
    #data = userStat()
    # data.sort(order=)
    # log = open('sample_log.csv')

    data = userStat()
    # data.sort(order=)
    # log = open('sample_log.csv')

    userSet = np.unique(data[:, 0])
    print userSet
    feature = np.zeros((len(userSet), 86))
    index = 0
    for user in userSet:
        #print user
        # 用户在线次数
        userOnlineDays = np.array([])
        userBuyDays = np.array([])
        #点击的商品，种类，品牌，卖家列表
        clickItems = np.array([])
        clickCategorys = np.array([])
        clickBrands = np.array([])
        clickSellers = np.array([])
        #购买的商品，种类，品牌，卖家列表
        buyItems = np.array([])
        buyCategorys = np.array([])
        buyBrands = np.array([])
        buySellers = np.array([])
        #收藏的商品，种类，品牌，卖家列表
        collectItems = np.array([])
        collectCategorys = np.array([])
        collectBrands = np.array([])
        collectSellers = np.array([])
        #加入购物车的商品，种类，品牌，卖家列�
        cartItems = np.array([])
        cartCategorys = np.array([])
        cartBrands = np.array([])
        cartSellers = np.array([])
        # 购买前浏览次�
        beforeBuyBrowserNum = 0
        # 购买前浏览天�
        beforeBuyBrowserDays = set()
        # 购买前点击次�
        beforeBuyClickNum = 0
        # 购买前点击天�
        beforeBuyClickDays = set()
        # 购买前收藏次�
        beforeBuyCollectNum = 0
        # 购买前收藏天�
        beforeBuyCollectDays = set()
        # 购买前加购物车次�
        beforeBuyCartNum = 0
        # 购买前加购物车天�
        beforeBuyCartDays = set()
        # 隔天购买次数
        continuousBuyDays = 0
        #周日与周�活跃度列�
        userOnlinemidweekDays = np.array([])
        userOnlineweekendDays = np.array([])
        #周日与周�购买列�
        userBuymidweekDays = np.array([])
        userBuyweekendDays = np.array([])
        #周日与周�购买列�
        lines = np.where(data[:,0]==user)[0]

        #print data[lines]
        userData = pd.DataFrame(data[lines],columns=['user_id','item_id','cat_id','seller_id','brand_id','time_stamp','action_type'])
        #interactiveOnDayList = pd.value_counts(userData['time_stamp']).values
        #用户的活跃当天item,category,seller,brand信息
        userICSB_data = userData.groupby(userData['time_stamp']).count()

        userBuyData = userData[userData.action_type==1]
        userBuyICSB_data = userBuyData.groupby(userBuyData['time_stamp']).count()

        for line in lines:
            userOnlineDays = np.append(userOnlineDays, data[line, 5])
            day = data[line, 5]
            #周日与周�活跃度列�
            if day%7 <= 4:
                userOnlinemidweekDays = np.append(userOnlinemidweekDays, day)
            else:
                userOnlineweekendDays = np.append(userOnlineweekendDays, day)

            # 点击

            # 点击
            if data[line, 6] == 0:
                # 点击�
                feature[index, 0] += 1
                #点击的商品，种类，品牌，卖家列表
                clickItems = np.append(clickItems, feature[index, 1])
                clickCategorys = np.append(clickCategorys, feature[index, 2])
                clickBrands = np.append(clickBrands, feature[index, 4])
                clickSellers = np.append(clickSellers, feature[index, 3])

            # 购买
            if data[line, 6] == 1:
                currentIndex = line
                # 购买�
                feature[index, 1] += 1
                # 购买时间
                userBuyDay = data[line, 5]
                # 购买时间数组
                userBuyDays = np.append(userBuyDays, userBuyDay)
                # 购买的商�
                item_id = data[line, 1]

                # 遍历该用户数据，找出商品ID且时间比购买时间�
                for user_index in lines:
                    if data[user_index, 1] == item_id and data[user_index, 1] <= userBuyDay:
                        beforeBuyBrowserNum += 1
                        beforeBuyBrowserDays.add(data[user_index, 5])
                        if data[user_index, 6] == 0:
                            beforeBuyClickNum +=1
                            beforeBuyClickDays.add(data[user_index, 5])
                        if data[user_index, 6] == 2:
                            beforeBuyCollectNum +=1
                            beforeBuyCollectDays.add(data[user_index, 5])
                        if data[user_index, 6] == 3:
                            beforeBuyCartNum +=1
                            beforeBuyCartDays.add(data[user_index, 5])
                    if data[user_index, 6] == 1:
                        if int(data[user_index, 5] - data[currentIndex, 5]) == 1:
                            continuousBuyDays += 1
                #购买的商品，种类，品牌，卖家列表
                buyItems = np.append(buyItems, feature[index, 1])
                buyCategorys = np.append(buyCategorys, feature[index, 2])
                buyBrands = np.append(buyBrands, feature[index, 4])
                buySellers = np.append(buySellers, feature[index, 3])

                #周日与周�活跃度列�
                if userBuyDay%7 <= 4:
                    userBuymidweekDays = np.append(userBuymidweekDays, day)
                else:
                    userBuyweekendDays = np.append(userBuyweekendDays, day)

            # 收藏
                    userBuyweekendDays = np.append(userBuyweekendDays, day)

            # 收藏
            if data[line, 6] == 2:
                # 收藏�
                feature[index, 2] += 1
                #收藏的商品，种类，品牌，卖家列表
                collectItems = np.append(collectItems, feature[index, 1])
                collectCategorys = np.append(collectCategorys, feature[index, 2])
                collectBrands = np.append(collectBrands, feature[index, 4])
                collectSellers = np.append(collectSellers, feature[index, 3])

            # 加入购物�
            # 加入购物�
            if data[line, 6] == 3:
                # 加入购物车量
                feature[index, 3] += 1
                #加入购物车的商品，种类，品牌，卖家列�
                cartItems = np.append(cartItems, feature[index, 1])
                cartCategorys = np.append(cartCategorys, feature[index, 2])
                cartBrands = np.append(cartBrands, feature[index, 4])
                cartSellers = np.append(cartSellers, feature[index, 3])
        # 点击，购买，收藏，购物车比率
        feature[index, 4:8] = feature[index, 0:4] / (int(sum(feature[index, 0:4])))
        # 用户在线天数
        feature[index, 8] = np.unique(userOnlineDays).shape[0]
        # 用户购物天数
        feature[index, 9] = np.unique(userBuyDays).shape[0]
        # 购买天数/在线天数
        feature[index, 10] = len(userBuyDays) / int(len(userOnlineDays))
        # 购买前平均访问天数及购买前平均访问次�
        if feature[index, 1] != 0:
            feature[index, 11] = beforeBuyBrowserNum / int(feature[index, 1])
            feature[index, 12] = len(beforeBuyBrowserDays) / int(feature[index, 1])
        else:
            feature[index, 11] = -1
            feature[index, 12] = -1

        # 隔天购买次数
        feature[index, 13] = continuousBuyDays
        # 行为�
        feature[index, 14] = sum(feature[index, 0:4])
        # 权重行为�
        weight = [0.05, 0.5, 0.3, 0.15]
        feature[index, 15] = sum(np.array(weight) * feature[index, 0:4])
        # 权重行为平方�
        feature[index, 16] = sum(np.square(np.array(weight) * feature[index, 0:4]))

        timeInterval = arrayInterval(userOnlineDays)
        # 平均访问间隔
        feature[index, 17] = timeInterval.mean()
        # 最大访问间�
        feature[index, 18] = timeInterval.max()
        # 最小访问间�
        feature[index, 19] = timeInterval.min()
        #点击的商品，种类，品牌，卖家数量
        feature[index, 20] = np.unique(clickItems).shape[0]
        feature[index, 21] = np.unique(clickCategorys).shape[0]
        feature[index, 22] = np.unique(clickBrands).shape[0]
        feature[index, 23] = np.unique(clickSellers).shape[0]
        #购买的商品，种类，品牌，卖家数量
        feature[index, 24] = np.unique(buyItems).shape[0]
        feature[index, 25] = np.unique(buyCategorys).shape[0]
        feature[index, 26] = np.unique(buyBrands).shape[0]
        feature[index, 27] = np.unique(buySellers).shape[0]
        #收藏的商品，种类，品牌，卖家数量
        feature[index, 28] = np.unique(collectItems).shape[0]
        feature[index, 29] = np.unique(collectCategorys).shape[0]
        feature[index, 30] = np.unique(collectBrands).shape[0]
        feature[index, 31] = np.unique(collectSellers).shape[0]
        #加入购物车的商品，种类，品牌，卖家数�
        feature[index, 32] = np.unique(cartItems).shape[0]
        feature[index, 33] = np.unique(cartCategorys).shape[0]
        feature[index, 34] = np.unique(cartBrands).shape[0]
        feature[index, 35] = np.unique(cartSellers).shape[0]
        #周日与周�活跃�
        #活跃次数
        feature[index, 36] = userOnlinemidweekDays.shape[0]
        feature[index, 37] = userOnlineweekendDays.shape[0]
        #活跃天数
        feature[index, 38] = np.unique(userOnlinemidweekDays).shape[0]
        feature[index, 39] = np.unique(userOnlineweekendDays).shape[0]

        #周日与周�购�
        #购买次数
        feature[index, 40] = userBuymidweekDays.shape[0]
        feature[index, 41] = userBuyweekendDays.shape[0]
        #购买天数
        feature[index, 42] = np.unique(userBuymidweekDays).shape[0]
        feature[index, 43] = np.unique(userBuyweekendDays).shape[0]

        #周日与周�购买次数比�
        feature[index, 44] = -1 if userOnlinemidweekDays.shape[0] == 0 else userBuymidweekDays.shape[0]/int(userOnlinemidweekDays.shape[0])
        feature[index, 45] = -1 if userOnlineweekendDays.shape[0] == 0 else userBuyweekendDays.shape[0]/int(userOnlineweekendDays.shape[0])
        #周日与周�购买天数比�
        feature[index, 46] = -1 if userOnlinemidweekDays.shape[0] == 0 else np.unique(userBuymidweekDays).shape[0]/int(np.unique(userOnlinemidweekDays).shape[0])
        feature[index, 47] = -1 if userOnlineweekendDays.shape[0] == 0 else np.unique(userBuyweekendDays).shape[0]/int(np.unique(userOnlineweekendDays).shape[0])

        #最后一次交互到结束的时间间�
        #最早一次交互到结束的时间间�
        #print userOnlineDays
        feature[index, 48] = 200 - int(userOnlineDays.min())
        feature[index, 49] = 200 - int(userOnlineDays.max())

        #交互当天的浏览次数最大值，最小值，均�
        feature[index, 50] = userICSB_data.user_id.values.max()
        feature[index, 51] = userICSB_data.user_id.min()
        feature[index, 52] = userICSB_data.user_id.mean()
        #交互当天浏览的商品数 最大值，最小值，均�
        feature[index, 53] = userICSB_data.item_id.values.max()
        feature[index, 54] = userICSB_data.item_id.min()
        feature[index, 55] = userICSB_data.item_id.mean()
        #交互当天浏览的种类数 最大值，最小值，均�
        feature[index, 56] = userICSB_data.cat_id.values.max()
        feature[index, 57] = userICSB_data.cat_id.min()
        feature[index, 58] = userICSB_data.cat_id.mean()

        #交互当天浏览的商家数 最大值，最小值，均�
        feature[index, 59] = userICSB_data.seller_id.values.max()
        feature[index, 60] = userICSB_data.seller_id.min()
        feature[index, 61] = userICSB_data.seller_id.mean()

        #交互当天浏览的品牌数 最大值，最小值，均�
        feature[index, 62] = userICSB_data.brand_id.values.max()
        feature[index, 63] = userICSB_data.brand_id.min()
        feature[index, 64] = userICSB_data.brand_id.mean()
        #若没有购买值均为-1
        if len(userBuyICSB_data.user_id.values) == 0:
            feature[index,65:80] = -1
        else:

            #购买当天的浏览次数最大值，最小值，均�
            feature[index, 65] = userBuyICSB_data.user_id.values.max()
            feature[index, 66] = userBuyICSB_data.user_id.min()
            feature[index, 67] = userBuyICSB_data.user_id.mean()
            #购买当天浏览的商品数 最大值，最小值，均�
            feature[index, 68] = userBuyICSB_data.item_id.values.max()
            feature[index, 69] = userBuyICSB_data.item_id.min()
            feature[index, 70] = userBuyICSB_data.item_id.mean()
            #购买当天浏览的种类数 最大值，最小值，均�
            feature[index, 71] = userBuyICSB_data.cat_id.values.max()
            feature[index, 72] = userBuyICSB_data.cat_id.min()
            feature[index, 73] = userBuyICSB_data.cat_id.mean()

            #购买当天浏览的商家数 最大值，最小值，均�
            feature[index, 74] = userBuyICSB_data.seller_id.values.max()
            feature[index, 75] = userBuyICSB_data.seller_id.min()
            feature[index, 76] = userBuyICSB_data.seller_id.mean()

            #购买当天浏览的品牌数 最大值，最小值，均�
            feature[index, 77] = userICSB_data.brand_id.values.max()
            feature[index, 78] = userICSB_data.brand_id.min()
            feature[index, 79] = userICSB_data.brand_id.mean()

        # 购买前平均访问天数及购买前平均访问次�
        if feature[index, 1] != 0:
            feature[index, 80] = beforeBuyClickNum / int(feature[index, 1])
            feature[index, 81] = len(beforeBuyClickDays) / int(feature[index, 1])
            feature[index, 82] = beforeBuyCollectNum / int(feature[index, 1])
            feature[index, 83] = len(beforeBuyCollectDays) / int(feature[index, 1])
            feature[index, 84] = beforeBuyCartNum / int(feature[index, 1])
            feature[index, 85] = len(beforeBuyCartDays) / int(feature[index, 1])
        else:
            feature[index, 80:86] = -1



        index += 1


    return feature


# 求数组间�
def arrayInterval(arr):
    if len(arr) <= 1:
        return np.array([0])
    return np.array([j - i for i, j in zip(arr[:-1], arr[1:])])


t0 = time.time()
feature = getFeature()
pretime = time.time() - t0
print("time :  %.3f s" % pretime)
print (feature[0, :])
